﻿/*  
cd /projects/hsieh_project/proj_201809/code_1_data/
qsas data_1_cityind_top_mkt_sum.sas 5 & 
*/

/*
Author: Adarsh Kumar
Objective: Calculate City-Industry Statistics, but define top industry-firms (nationally) based on 
	   mkts/firm
*/
libname hr "/projects/hsieh_project/proj_201809/data/";

%Let dir_out = /projects/hsieh_project/proj_201809/data/;

/*
================================================================================
Load Data
*/

%include "/projects/hsieh_project/proj_201809/code_1_data/m_read.sas" /source2;
%m_read(param_dev=1,param_lyear=%quote(1977,2013),param_czone=0,param_msa1983=1,param_msacz=0,param_drop=1);

%include "/projects/hsieh_project/code_0_general/m_perc_by_var.sas" /source2;

/*
================================================================================

/*
Macro that aggregates data to the city-industry level. 

dt_in = name of input data
dt_out = name of output data
c_city = variable for city/geo
l_ind_perc= list of percentiles to create statistics for (e.g. top 10 percentile ind-firms defined based on mkt/firm)
l_cind_rank= list of ranks to create statistics for (e.g. top 1 firm in the city-industry)
ivar = mkt variable to use to find rank
ivaro = mkt variable label to apply
*/

%macro cind_sum_top_mkt(dt_in= , dt_out= , c_city= , l_ind_perc= , ivar= , ivaro= );

%put --------------------------------------------------------------------------------;
%put &ivar.;

data dt_proc;
  set &dt_in.(keep=year &c_city. ch_ind sector firmnum lbdid emp est pay runif);
run;

/*
--------------------------------------------------------------------------------
Aggregate to year-industry-city-firm level 
*/

proc sort data=dt_proc; 
  by year sector ch_ind &c_city. firmnum;
run;

proc means data=dt_proc noprint;
  by year sector ch_ind &c_city. firmnum;
  output out=dt_indf(drop=_type_ _freq_) sum(emp est pay)=emp_cind est_cind pay_cind;
run;

data dt_indf;
  set dt_indf;
  n_cind = 1;
run;

/* 
Assigning ranks to industry-firms based on ivar (markets)
*/ 

proc sort data=dt_proc; by year ch_ind firmnum &ivar.;

data dt_var;
  set dt_proc;
  by year ch_ind firmnum &ivar.;
  if first.&ivar.; 
  &ivaro.=1;
run;

%put ----------------------------------------;
%put Define Top Firms;

/* 
--------------------------------------------------------------------------------
Define top firms based on markets (note that this differs from the employment based definition)
*/
proc sort data=dt_var; by year ch_ind firmnum &ivar.; /* At geo level */

%m_perc_by_var(ds_in=dt_var, ds_out=dt_perc, var_in=&ivaro., var_by=%bquote(year ch_ind), var_unit=firmnum, var_out=mkt, var_runif=runif);

/* 
dt_perc is at the city-industry-firm level. We're merging in industry-firm ranks (by mkts). 
*/

proc sort data=dt_perc; by year ch_ind firmnum;
proc sort data=dt_indf; by year ch_ind firmnum;

data dt_indf;
  merge dt_indf dt_perc;
  by year ch_ind firmnum;
run;

/*
--------------------------------------------------------------------------------
Calculate total employment of all, top x% firms in each c_city
*/

%Let l_cind_var_0=emp_cind est_cind pay_cind n_cind;

%Let l_ind_var= ;

%Let i_list = 1;
%do %while (%scan(%bquote(&l_ind_perc.), &i_list) ~= );
  %Let i_perc=%scan(%bquote(&l_ind_perc.), &i_list.);
  %put Top &i_perc.% Industry Firms;
  
  data dt_indf;
    set dt_indf;
    emp_ind_&ivaro._&i_perc. = 0;
    est_ind_&ivaro._&i_perc. = 0;
    pay_ind_&ivaro._&i_perc. = 0;
    n_ind_&ivaro._&i_perc. = 0;
  run;
  
  data dt_indf;
    set dt_indf;
    if mkt_perc <= %sysevalf(&i_perc. / 100) then 
      do;
	emp_ind_&ivaro._&i_perc. = emp_cind;
	est_ind_&ivaro._&i_perc. = est_cind;
	pay_ind_&ivaro._&i_perc. = pay_cind;
	n_ind_&ivaro._&i_perc. = 1;
      end;
  run;
  
  %Let l_ind_var = &l_ind_var. emp_ind_&ivaro._&i_perc. est_ind_&ivaro._&i_perc. pay_ind_&ivaro._&i_perc. n_ind_&ivaro._&i_perc.;
  %Let i_list = %eval(&i_list + 1);
%end;

%put List of Variables for Top Industry Firms;
%put &l_ind_var.;

/*
--------------------------------------------------------------------------------
Aggregate to year-industry-city level 
*/

proc sort data=dt_indf; by year sector ch_ind &c_city.;

proc means data=dt_indf noprint;
  by year sector ch_ind &c_city.;
  output out=&dt_out.(drop=_type_ _freq_) sum(&l_cind_var_0. &l_ind_var.)=&l_cind_var_0. &l_ind_var.;
run;

%mend;

/* Call to SAS macro */

%cind_sum_top_mkt(dt_in= lbd, dt_out= dt_cityind_est, c_city= msa1983, l_ind_perc= 10 1, ivar= lbdid, ivaro= n_est);
%cind_sum_top_mkt(dt_in= lbd, dt_out= dt_cityind_msa1983, c_city= msa1983, l_ind_perc= 10 1, ivar= msa1983, ivaro= n_msa1983);

/*Export */
proc export data=dt_cityind_est outfile="&dir_out./cityind_top_mkt_est_sum.dta" replace;
run;

proc export data=dt_cityind_msa1983 outfile="&dir_out./cityind_top_mkt_msa1983_sum.dta" replace;
run;





